This vignette provides an overview of how to create customizable
plots using ggplot2 while still using netify
to prepare the data.
Lets load the necessary libraries.
We’ll also use the ggnewscale package to create multiple
legends when necessary in the same plot (e.g., if you want to have
legends for a color aesthetic for both nodes and edges).
if(!'ggnewscale' %in% rownames(installed.packages())){
install.packages('ggnewscale', repos='https://cloud.r-project.org') }
library(ggnewscale)First lets create a netlet object from some dyadic data
(ICEWS data) using the netify package.
# load icews data
data(icews)
# choose attributes
nvars = c( 'i_polity2', 'i_log_gdp', 'i_log_pop' )
dvars = c( 'matlCoop', 'verbConf', 'matlConf' )
# create a netify object
netlet = netify(
dyad_data=icews, actor1='i', actor2='j',
time = 'year',
symmetric=FALSE, weight='verbCoop',
mode='unipartite', sum_dyads=FALSE,
actor_time_uniform=TRUE, actor_pds=NULL,
diag_to_NA=TRUE, missing_to_zero=TRUE,
nodal_vars = nvars,
dyad_vars = dvars
)
# subset to a few actors
actors_to_keep = c(
'Australia', 'Brazil',
'Canada', 'Chile', 'China',
'Colombia', 'Egypt', 'Ethiopia',
'France', 'Germany', 'Ghana',
'Hungary', 'India', 'Indonesia',
'Iran, Islamic Republic Of',
'Israel', 'Italy', 'Japan', 'Kenya',
"Korea, Democratic People's Republic Of",
'Korea, Republic Of', 'Nigeria', 'Pakistan',
'Qatar', 'Russian Federation', 'Saudi Arabia',
'South Africa', 'Spain', 'Sudan',
'Syrian Arab Republic', 'Thailand',
'United Kingdom', 'United States',
'Zimbabwe' )
netlet = subset_netlet(
netlet,
what_to_subset = actors_to_keep
)
# print
netlet## ✔ Hello, you have created network data, yay!
## • Longitudinal
## • Unipartite
## • Asymmetric
## • Weights from `verbCoop`
## • Longitudinal: 13 Periods
## • # Unique Row Actors: 34
## • # Unique Column Actors: 34
## • # Unique Actors: 34
## Network Summary Statistics:
## dens miss mean recip trans
## verbCoop 0.887 0 179.484 0.978 0.928
## • Nodal Features: i_polity2, i_log_gdp, i_log_pop
## • Dyad Features: matlCoop, verbConf, matlConf
## • Graph Features: None
##
## lyr_nt> # cross-sectional example
## lyr_nt> data(icews)
##
## lyr_nt> icews_10 <- icews[icews$year==2010,]
##
## lyr_nt> # generate netify objects that will be layered together
## lyr_nt> icews_verbCoop <- netify(
## lyr_nt+ dyad_data=icews_10, actor1='i', actor2='j',
## lyr_nt+ symmetric=FALSE, weight='verbCoop',
## lyr_nt+ nodal_vars=c('i_log_gdp', 'i_log_pop'),
## lyr_nt+ dyad_vars=c('verbConf') )
##
## lyr_nt> icews_matlCoop <- netify(
## lyr_nt+ dyad_data=icews_10, actor1='i', actor2='j',
## lyr_nt+ symmetric=FALSE, weight='matlCoop',
## lyr_nt+ nodal_vars='i_polity2',
## lyr_nt+ dyad_vars=c('matlConf') )
##
## lyr_nt> # layer together cross-sec netify objects together
## lyr_nt> icews_verbCoop_matlCoop <- layer_netlet(
## lyr_nt+ netlet_list=list(icews_verbCoop, icews_matlCoop),
## lyr_nt+ layer_labels=c('verbCoop', 'matlCoop') )
##
## lyr_nt> # dimensions of the multilayer network from the
## lyr_nt> # cross-sectional case will be a
## lyr_nt> # (number of actors) x (number of actors) x (number of layers)
## lyr_nt> dim(get_raw(icews_verbCoop_matlCoop))
## [1] 152 152 2
##
## lyr_nt> # longitudinal array example
## lyr_nt> icews_verbCoop_longit_a <- netify(
## lyr_nt+ dyad_data=icews, actor1='i', actor2='j', time='year',
## lyr_nt+ symmetric=FALSE, weight='verbCoop',
## lyr_nt+ nodal_vars=c('i_log_gdp', 'i_log_pop'),
## lyr_nt+ dyad_vars=c('verbConf'),
## lyr_nt+ output_format='longit_array' )
##
## lyr_nt> icews_matlCoop_longit_a <- netify(
## lyr_nt+ dyad_data=icews, actor1='i', actor2='j', time='year',
## lyr_nt+ symmetric=FALSE, weight='matlCoop',
## lyr_nt+ nodal_vars=c('i_polity2'),
## lyr_nt+ dyad_vars=c('matlConf'),
## lyr_nt+ output_format='longit_array' )
##
## lyr_nt> # layer together
## lyr_nt> icews_verbCoop_matlCoop_longit_a <- layer_netlet(
## lyr_nt+ netlet_list=list(icews_verbCoop_longit_a, icews_matlCoop_longit_a),
## lyr_nt+ layer_labels=c('verbCoop', 'matlCoop') )
##
## lyr_nt> # dimensions of the multilayer network from the
## lyr_nt> # longitudinal array case will be a
## lyr_nt> # (number of actors) x (number of actors) x (number of layers) x
## lyr_nt> # (number of time periods)
## lyr_nt> dim(get_raw(icews_verbCoop_matlCoop_longit_a))
## [1] 152 152 2 13
##
## lyr_nt> # longitudinal list example
## lyr_nt> # generate similar longitudinal list versions
## lyr_nt> icews_verbCoop_longit_l <- netify(
## lyr_nt+ dyad_data=icews, actor1='i', actor2='j', time='year',
## lyr_nt+ symmetric=FALSE, weight='verbCoop',
## lyr_nt+ nodal_vars=c('i_log_gdp', 'i_log_pop'),
## lyr_nt+ dyad_vars=c('verbConf') )
##
## lyr_nt> icews_matlCoop_longit_l <- netify(
## lyr_nt+ dyad_data=icews, actor1='i', actor2='j', time='year',
## lyr_nt+ symmetric=FALSE, weight='matlCoop',
## lyr_nt+ nodal_vars=c('i_polity2'),
## lyr_nt+ dyad_vars=c('matlConf') )
##
## lyr_nt> # layer together
## lyr_nt> icews_verbCoop_matlCoop_longit_l <- layer_netlet(
## lyr_nt+ netlet_list=list(icews_verbCoop_longit_l, icews_matlCoop_longit_l),
## lyr_nt+ layer_labels=c('verbCoop', 'matlCoop') )
##
## lyr_nt> # dimensions of the multilayer network from the
## lyr_nt> # longitudinal list case will be a
## lyr_nt> # (number of time periods) list of
## lyr_nt> # (number of actors) x (number of actors) x (number of layers) arrays
## lyr_nt> names(get_raw(icews_verbCoop_matlCoop_longit_l))
## [1] "2002" "2003" "2004" "2005" "2006" "2007" "2008" "2009" "2010" "2011"
## [11] "2012" "2013" "2014"
##
## lyr_nt> dim(get_raw(icews_verbCoop_matlCoop_longit_l)$'2010')
## [1] 152 152 2
##
## lyr_nt> # information on layer labels can be accessed
## lyr_nt> # from the `layers` attribute
## lyr_nt> attr(icews_verbCoop_matlCoop, 'layers')
## [1] "verbCoop" "matlCoop"
##
## lyr_nt> attr(icews_verbCoop_matlCoop_longit_l, 'layers')
## [1] "verbCoop" "matlCoop"
## ✔ Hello, you have created network data, yay!
## • Longitudinal
## • Unipartite
## • Multilayer
## • Asymmetric
## • Weights from `verbCoop` | Weights from `matlCoop`
## • Longitudinal: 13 Periods
## • # Unique Row Actors: 152
## • # Unique Column Actors: 152
## • # Unique Actors: 152
## Network Summary Statistics:
## dens miss mean recip trans
## verbCoop 0.418 0 19.114 0.976 0.627
## matlCoop 0.088 0 0.480 0.474 0.329
## • Nodal Features: i_log_gdp, i_log_pop, i_polity2
## • Dyad Features: verbConf, matlConf
## • Graph Features: None
This is a longitudinal, weighted network with nodal and dyadic attributes. In a few more steps we will show how to highlight these attributes in the plot.
Next, we use the net_plot_data function to create a data
frame for ggplot2. net_plot_data extracts and
sets up node and edge data from a netify object according
to specified plotting arguments. It returns a list of different
components but the most important one for users is the
net_dfs element. This element contains two objects:
edge_data and nodal_data. These are data
frames that can be passed to ggplot2.
# create a data frame for plotting
plot_data = net_plot_data(netlet)
# get relevant dfs
net_dfs = plot_data$net_dfs
# check structure of what's here
str(net_dfs)## List of 2
## $ edge_data :'data.frame': 12937 obs. of 11 variables:
## ..$ from : chr [1:12937] "Australia" "Australia" "Australia" "Australia" ...
## ..$ to : chr [1:12937] "Brazil" "Brazil" "Brazil" "Brazil" ...
## ..$ time : chr [1:12937] "2002" "2003" "2004" "2005" ...
## ..$ verbCoop: num [1:12937] 3 3 24 27 54 4 26 7 12 5 ...
## ..$ matlCoop: num [1:12937] 0 1 0 0 0 0 0 0 1 0 ...
## ..$ verbConf: num [1:12937] 0 2 0 2 3 0 2 1 0 0 ...
## ..$ matlConf: num [1:12937] 0 0 2 0 1 0 0 0 0 0 ...
## ..$ x1 : num [1:12937] -3.187 -0.3978 -0.0496 -0.3853 -0.5135 ...
## ..$ y1 : num [1:12937] 2.535 -1.3386 0.0651 0.9399 1.4266 ...
## ..$ x2 : num [1:12937] -2.9515 -0.0649 -0.0429 -0.2949 -0.3298 ...
## ..$ y2 : num [1:12937] 2.315 -1.441 -0.148 1.045 1.485 ...
## $ nodal_data:'data.frame': 442 obs. of 10 variables:
## ..$ name : chr [1:442] "Australia" "Australia" "Australia" "Australia" ...
## ..$ time : chr [1:442] "2002" "2003" "2004" "2005" ...
## ..$ i_polity2 : int [1:442] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ i_log_gdp : num [1:442] 27.6 27.6 27.6 27.7 27.7 ...
## ..$ i_log_pop : num [1:442] 16.8 16.8 16.8 16.8 16.8 ...
## ..$ x : num [1:442] -3.187 -0.3978 -0.0496 -0.3853 -0.5135 ...
## ..$ y : num [1:442] 2.535 -1.3386 0.0651 0.9399 1.4266 ...
## ..$ name_text : chr [1:442] "Australia" "Australia" "Australia" "Australia" ...
## ..$ name_label: chr [1:442] "Australia" "Australia" "Australia" "Australia" ...
## ..$ id : chr [1:442] "Australia_2002" "Australia_2003" "Australia_2004" "Australia_2005" ...
## from to time verbCoop matlCoop verbConf matlConf x1
## 1 Australia Brazil 2002 3 0 0 0 -3.18695808
## 2 Australia Brazil 2003 3 1 2 0 -0.39782446
## 3 Australia Brazil 2004 24 0 0 2 -0.04957632
## 4 Australia Brazil 2005 27 0 2 0 -0.38526735
## 5 Australia Brazil 2006 54 0 3 1 -0.51353531
## 6 Australia Brazil 2007 4 0 0 0 -0.69085788
## y1 x2 y2
## 1 2.53500179 -2.95152256 2.3151148
## 2 -1.33861627 -0.06486153 -1.4411348
## 3 0.06513246 -0.04293486 -0.1479501
## 4 0.93989844 -0.29488038 1.0449019
## 5 1.42660061 -0.32983771 1.4847187
## 6 1.79965809 -0.86547402 1.7542202
## name time i_polity2 i_log_gdp i_log_pop x y
## 1 Australia 2002 10 27.55492 16.78568 -3.18695808 2.53500179
## 2 Australia 2003 10 27.58556 16.79718 -0.39782446 -1.33861627
## 3 Australia 2004 10 27.62686 16.80787 -0.04957632 0.06513246
## 4 Australia 2005 10 27.65791 16.82005 -0.38526735 0.93989844
## 5 Australia 2006 10 27.68495 16.83354 -0.51353531 1.42660061
## 6 Australia 2007 10 27.72203 16.85179 -0.69085788 1.79965809
## name_text name_label id
## 1 Australia Australia Australia_2002
## 2 Australia Australia Australia_2003
## 3 Australia Australia Australia_2004
## 4 Australia Australia Australia_2005
## 5 Australia Australia Australia_2006
## 6 Australia Australia Australia_2007
The x and y in nodal_data and
the x1, y1, x2, and
y2 in edge_data are the coordinates of the
nodes and edges, respectively. These are the coordinates that will be
used to plot the network.
Now that we have the data, we can create a plot using
ggplot2. We’ll use the geom_segment and
geom_point (or, geom_label,
geom_text, and the `ggrepel package equivalents) functions
to plot the edges and nodes, respectively.
ggplot() +
geom_segment(
data = net_dfs$edge_data,
aes(
x=x1,
y=y1,
xend=x2,
yend=y2
),
color='lightgrey',
alpha=.2
) +
geom_point(
data = net_dfs$nodal_data,
aes(
x=x,
y=y,
size=i_log_pop,
color=i_polity2
)
) +
labs(
color='Polity',
size='Log(Pop.)'
) +
scale_color_gradient(low='#a6bddb', high='#014636') +
facet_wrap(~time, scales='free') +
theme_netify()By default layouts for node positions are drawn from the
layout_nicely algorithm in the igraph package.
Users can specify other layouts as, for example, say that you wanted to
use the mds algorithm instead:
# create a df using mds instead
plot_data_mds = net_plot_data(netlet,
list(
layout='mds'
)
)
# see new x-y coordinates
lapply(plot_data_mds$net_dfs, head)## $edge_data
## from to time verbCoop matlCoop verbConf matlConf x1
## 1 Australia Brazil 2002 3 0 0 0 -0.35485450
## 2 Australia Brazil 2003 3 1 2 0 0.20857397
## 3 Australia Brazil 2004 24 0 0 2 -0.04224981
## 4 Australia Brazil 2005 27 0 2 0 -0.06014447
## 5 Australia Brazil 2006 54 0 3 1 0.15073774
## 6 Australia Brazil 2007 4 0 0 0 -0.03161665
## y1 x2 y2
## 1 -0.16803048 -0.7281113 0.3619659
## 2 -0.12705283 0.2552975 -0.1025320
## 3 0.05123628 0.2101186 0.9327511
## 4 -0.02708750 0.4552562 0.1927606
## 5 0.46114648 0.1278155 0.3895673
## 6 -0.03885021 -0.3606698 0.8028908
##
## $nodal_data
## name time i_polity2 i_log_gdp i_log_pop x y
## 1 Australia 2002 10 27.55492 16.78568 -0.35485450 -0.16803048
## 2 Australia 2003 10 27.58556 16.79718 0.20857397 -0.12705283
## 3 Australia 2004 10 27.62686 16.80787 -0.04224981 0.05123628
## 4 Australia 2005 10 27.65791 16.82005 -0.06014447 -0.02708750
## 5 Australia 2006 10 27.68495 16.83354 0.15073774 0.46114648
## 6 Australia 2007 10 27.72203 16.85179 -0.03161665 -0.03885021
## name_text name_label id
## 1 Australia Australia Australia_2002
## 2 Australia Australia Australia_2003
## 3 Australia Australia Australia_2004
## 4 Australia Australia Australia_2005
## 5 Australia Australia Australia_2006
## 6 Australia Australia Australia_2007
So far, we have focused on using color to convey information about
nodal attributes in the network (population size and polity score). Now,
let’s add more edge information to the plot. For example, we can include
information about the matlConf dyadic attribute. Imagine we
want to highlight edges of verbal cooperation that occur at the same
time as when higher than average levels of material conflict occur in
the network. First, let’s create the variable in the edge data.
if(!'dplyr' %in% rownames(installed.packages())){
install.packages('dplyr', repos='https://cloud.r-project.org') }
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# create high_matlConf variable
net_dfs$edge_data = net_dfs$edge_data |>
group_by(time) |>
mutate(
high_matlConf = matlConf > mean(matlConf, na.rm=TRUE)
) |>
ungroup() |>
as.data.frame()
# check
head(net_dfs$edge_data)## from to time verbCoop matlCoop verbConf matlConf x1
## 1 Australia Brazil 2002 3 0 0 0 -3.18695808
## 2 Australia Brazil 2003 3 1 2 0 -0.39782446
## 3 Australia Brazil 2004 24 0 0 2 -0.04957632
## 4 Australia Brazil 2005 27 0 2 0 -0.38526735
## 5 Australia Brazil 2006 54 0 3 1 -0.51353531
## 6 Australia Brazil 2007 4 0 0 0 -0.69085788
## y1 x2 y2 high_matlConf
## 1 2.53500179 -2.95152256 2.3151148 FALSE
## 2 -1.33861627 -0.06486153 -1.4411348 FALSE
## 3 0.06513246 -0.04293486 -0.1479501 FALSE
## 4 0.93989844 -0.29488038 1.0449019 FALSE
## 5 1.42660061 -0.32983771 1.4847187 FALSE
## 6 1.79965809 -0.86547402 1.7542202 FALSE
Now that we have the new variable in the data.frame, we can plot by
it but note that we now need a color aesthetic for both points and
segments, even though ggplot2 only supports one legend by
aesthetic by default. We can get around this by using the
new_scale_color function from the `ggnewscale
package.
# color line segments by this new variable
ggplot() +
geom_segment(
data = net_dfs$edge_data,
aes(
x=x1,
y=y1,
xend=x2,
yend=y2,
color=high_matlConf
),
alpha=.2
) +
scale_color_manual(
name='',
values=c('grey', 'red'),
labels=c('Below Avg. Matl. Conf', 'Above Avg.')
) +
new_scale_color() +
geom_point(
data = net_dfs$nodal_data,
aes(
x=x,
y=y,
size=i_log_pop,
color=i_polity2
)
) +
scale_color_gradient(
name='Polity',
low='#a6bddb', high='#014636') +
labs(
size='Log(Pop.)'
) +
facet_wrap(~time, scales='free') +
theme_netify() +
theme(
legend.position='right'
)Boschee, Elizabeth; Lautenschlager, Jennifer; O’Brien, Sean; Shellman, Steve; Starz, James; Ward, Michael, 2015, ``ICEWS Coded Event Data’’, doi:10.7910/DVN/28075 , Harvard Dataverse.
Pedersen, T. L. (2020). ggnewscale: Multiple Fill and Colour Scales in ‘ggplot2’. R package version 0.4.3. https://CRAN.R-project.org/package=ggnewscale
Wickham, H. (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York.